


***Set paths
local dir "..."
local dropbox_data "..."
local dropbox_data_create "..."
local local_data "..."


***locals 
local F = 100


tempfile tempdata 
save `tempdata'


********************************************** 
***** IMPORT HOURLY CANDEAL 
**********************************************

use "`dir'/avg_hourly_candeal_bonds_bills.dta", clear 

unique isin

rename bidprice bidprice_can
rename askprice askprice_can
rename bidyield bidyield_can 
rename askyield askyield_can

replace askyield_can = askyield_can *100
replace bidyield_can = bidyield_can *100

gen date = dofc(hour_stamp)
format date %td

rename hour_stamp exec_time_grid 

*drop anything that is on the weekends if any - there is none (and shouldn't be)
gen dayofweek = dow(date)
drop if dayofweek ==0 | dayofweek==6  // drop weekends
drop dayofweek

keep bidprice_can askprice_can exec_time_grid date isin maturity_date coupon bidyield_can askyield_can 

*drop security that is not a government bond
drop if isin=="CA87262KAB13" // this is not a government bond


tempfile tempCan
save  `tempCan' , replace


********************************************** 
***** IMPORT HOURLY BLOOMBERG DATA 
**********************************************
	
use  "`dir'/bloomberg_2020.dta" , clear
append  using "`dir'/bloomberg_2020_2.dta" 
append  using "`dir'/bloomberg_2020_3.dta" 


rename BidPrice bidprice_bl 
rename AskPrice askprice_bl 
rename BidYield bidyield_bl 
rename AskYield askyield_bl 
rename ISIN isin
rename Coupon coupon_bl

drop if Date==. // 2 lines with missing observations


*normalize 
replace bidyield_bl = bidyield_bl*100 // in bsp
replace askyield_bl = askyield_bl*100 // in bsp

gen double midyield_bl = (askyield_bl+bidyield_bl)/2
gen double midprice_bl = (askprice_bl+bidprice_bl)/2


*correct time 
gen double dateround = round(Date,1000*60*60) // round milli-seconds to minutes, else the ceiling does not work 
format dateround %tc
gen double exec_time_grid = 30*2 *60000 * ceil(dateround/(30*2*60 * 1000))
format exec_time_grid %tc


	*check
	*gen hour2 = hh(exec_time_grid)
	*gen min2  = mm(exec_time_grid)
	*gen time_grid= hms(hour2,min2,00)  
	*format time_grid %tcHH:MM
	*browse Date exec_time_grid  time_grid if time_grid<tc(07:00)
	

gen date = dofc(exec_time_grid)
format date %td


drop if isin=="CA1350Z7WB67" // this bond matures in 2015

gen year = year(date)
drop if year<2016 | year>2019
drop year 

unique isin 

duplicates drop Date isin bidprice_bl askprice_bl bidyield_bl askyield_bl , force

*correct some isin's that had wrong yields
bys date exec_time_grid isin: gen a0=_N
drop if a0==2
tab a0 
drop a0 

gen old =1

*bring in the dropped isins
do "`dropbox_data_create'/bloomberg_duplicates.do"

bys date exec_time_grid isin: gen a0=_N 
tab isin if a0>1 
drop a0 

*drop duplicates
duplicates drop Date isin bidprice_bl askprice_bl bidyield_bl askyield_bl, force

bys date exec_time_grid isin : gen a0=_N 
tab isin if a0>1 

*keep the new ones 
drop if old==1 & a0>1
drop a0 
bys date exec_time_grid isin : gen a0=_N 
tab isin if a0>1 

*only keep what is used 
keep date exec_time_grid isin bidprice_bl askprice_bl midprice_bl bidyield_bl askyield_bl midyield_bl coupon_bl

tempfile tempBloombergBills
save  `tempBloombergBills' , replace


**********************************************
*** MERGE WITH BLOOMBERG BILLS 
**********************************************

use `tempdata', clear
merge m:1  date exec_time_grid isin  using `tempBloombergBills' 

*missing isins:
preserve
collapse IIROC_QUANTITY (min) date Maturity, by(isin _merge)
bys isin: gen a0 = _N
browse  if a0==1 & _merge==1
browse  if a0==1 & _merge==2
count  if a0==1 & _merge==1
restore

*clean up maturity and days-to-maturity
replace settlement_date = date 							 if _merge==2
bys isin: egen a0 = max(Maturity)
format a0 %td
replace Maturity 			= a0                         if Maturity==.
replace days_to_maturity  	= Maturity - settlement_date if days_to_maturity==.
replace days_to_maturity_e 	= Maturity - date            if days_to_maturity_e==.
drop a0

	*analyze which isins we now have 
	preserve
	collapse (max) _merge, by(isin)
	sort _merge
	unique isin if _merge==1
	unique isin if _merge==2 
	unique isin if _merge==3 
	restore 

*drop bloomberg quotes after maturity
drop if _merge==2 & Maturity<date  // This is one isin that has the error

rename _merge _merge_bl



**********************************************
*** MERGE WITH CANDEAL
**********************************************
	
merge m:m  date exec_time_grid isin  using `tempCan' 

gen hour=hh(exec_time_grid)

	*check merge=1	
	count if _merge==1 & hour>=7 & hour<=16 
		*--> within CanDeal business hours --> these shoudl be there
	count if _merge==1 & (hour<7 | hour>16) 
		*--> out of CanDeal business hours ---> here CanDeal is not an outside option!
	

	*analyze which isins we now have 
	preserve
	collapse (max) _merge, by(isin)
	sort _merge
	unique isin if _merge==1 
		*--> 8 bonds are missing in CanDeal. 
		*Those are the 8 RBB bonds. This is because the CanDeal quotes do not include RBB bonds.
	unique isin if _merge==2 
		*-->no bond, never shows up OTC.
	unique isin if _merge==3 
	restore 
	
	*missing isins:
	preserve
	collapse IIROC_QUANTITY (min) date Maturity, by(isin _merge)
	bys isin: gen a0 = _N
	count  if a0==1 & _merge==1
	restore
	

*clean up maturity and days-to-maturity
replace settlement_date = date 							 if _merge==2
bys isin: egen a0 = max(Maturity)
format a0 %td
replace Maturity 			= a0                         if Maturity==.
replace days_to_maturity  	= Maturity - settlement_date if days_to_maturity==.
replace days_to_maturity_e 	= Maturity - date            if days_to_maturity_e==.
drop a0

rename _merge _merge_can
	
	



